***************
* Title: gambia_ecd_edcc_tableA1.do
* Author: Todd Pugatch
* Description: replication code for Blimpo, Carneiro, Jervis, and Pugatch,
*	"Improving Access and Quality in Early Childhood Development Programs: 
*		Experimental Evidence from The Gambia"
*	for Economic Development and Cultural Change
* Inputs: ECD_3to6_Gambia_cleanv1.dta
* Outputs: gambia_ecd_edcc_tableA1.txt, gambia_ecd_edcc_tableA1[a-f].xls
* Notes: creates Table A1
****************
#delimit;
local start=`"$S_TIME"';
clear;
clear matrix;
clear mata;
graph drop _all;
cap log close;
set more off;
/*set directory:
	cd mydir
*/
local data=`"Data\cleaned"';
local output=`"analysis\output"';
log using analysis\output\gambia_ecd_edcc_tableA1.txt, text replace;

* LOAD AND PREPARE DATA;
qui use `data'\ECD_3to6_Gambia_cleanv1, clear;
/*NEED TO CHECK TREATMENT STATUS OF SETTLEMENT 37028. DISREGARD FOR NOW*/
qui drop if settlement_code==37028;

* define 3 groups:
	--in baseline
	--in endline (original sample)
	--in endline (newly sampled);
qui gen in_baseline=(ip22!=3 & ip22!=.);	
qui gen in_endline=(interview_result==1);
qui gen in_endline_old=(in_endline==1 & in_baseline==1);
qui gen in_endline_new=(in_endline==1 & in_baseline==0);
/*treat unresolved gender mismatches as new to endline*/
qui replace in_endline_new=1 if in_endline_new==0 & child_gender_mismatch_resolved==0;
qui replace in_endline_old=. if in_endline_new==1;

* repeat for having valid MDAT fine motor and language/hearing scores;
/*note that "in sample" defined as having at least one valid MDAT score, not both as in previous analyses of attrition*/
qui gen in_baseline_mdat=(in_baseline==1 & (zfinemotor_baseline!=.|zlanghear_baseline!=.));
qui replace in_baseline_mdat=. if in_endline_new==1;
foreach x in endline endline_old endline_new {;
	qui gen in_`x'_mdat=(in_`x'==1 & (zfinemotor_endline!=.|zlanghear_endline!=.));
};
qui replace in_endline_old_mdat=. if in_endline_new==1;
qui gen in_endline_old_mdat_base=in_endline_old_mdat;
qui replace in_endline_old_mdat_base=. if in_baseline_mdat!=1; /*in baseline MDAT & in endline MDAT*/
	
* define indicators for treatment;
qui gen communitybased=(treatment==6);
qui gen purecontrol=(treatment==1);
qui gen ECDAnnex_control=(treatment==4);
qui gen ECDAnnex_treated=(treatment==5);

* keep only children aged 3-6 with valid baseline interview;
qui save `data'\ecdtemp, replace; /*to return to dataset later, after conducting baseline analysis*/
qui keep if child_age_mths_dob>=36 & child_age_mths_dob<84 & child_age_mths_dob!=.; 
qui keep if in_baseline==1; 

* sample sizes (# of children and project sites);
qui egen site=tag(settlement_code);
table treatment, c(freq rawsum site);

* define set of baseline covariates: MDAT aggregates and subsets;
local X "zfinemotor_baseline zlanghear_baseline zfinemotor_adj_baseline zlanghear_adj_baseline 
	langhear_name_pct_baseline langhear_sentence_baseline langhear_count_pct_baseline langhear_colors_pct_baseline
	finemotor_blocks_pct_baseline finemotor_draw_pct_baseline finemotor_order_pct_baseline";
	
* BALANCE TESTS;
* get unadjusted differences (clustering standard errors by settlement), but adjust p-values for regional stratification;
* analyze community-based ECD and ECD Annex experiments separately;
* can verify that output is identical to regression-baed version of gambia_ecd_balance[6-7].do; 

* unadjusted means & differences by treatment status;
/*account for correlated outcomes by settlement by clustering at that level*/
/*ECD Annex treated v. ECD Annex control*/
orth_out `X' if ECDAnnex_control==1|ECDAnnex_treated==1 using `output'\gambia_ecd_edcc_tableA1a.xls, 
	by(treatment) se vce(cluster settlement_code) compare count colnum title("unadjusted means") replace;

/*community-based v. pure control*/
orth_out `X' if purecontrol==1|communitybased==1 using `output'\gambia_ecd_edcc_tableA1b.xls, 
	by(treatment) se vce(cluster settlement_code) compare count colnum title("unadjusted means") replace;
	
* adjusted means, adjusting for regional stratification; 
/*ECD Annex treated v. ECD Annex control*/
orth_out `X' if ECDAnnex_control==1|ECDAnnex_treated==1 using `output'\gambia_ecd_edcc_tableA1a.xls, 
	by(treatment) se vce(cluster settlement_code) compare test count colnum covar(region2) title("adjusted means") 
	happend replace;

/*community-based v. pure control*/
orth_out `X' if purecontrol==1|communitybased==1 using `output'\gambia_ecd_edcc_tableA1b.xls, 
	by(treatment) se vce(cluster settlement_code) compare test count colnum covar(region2) title("adjusted means") 
	happend replace;

* ANALYSIS OF ENDLINE RESULTS;
* analyze with and without controlling for baseline outcome;
* prep sample;
qui use `data'\ecdtemp, clear;
keep if (child_age_mths_dob>=36 & child_age_mths_dob<84 & in_baseline==1)|(child_age_mths_dob==. & in_baseline==0);

* keep if new to endline and would have been 3-6 at baseline (define as 4-8 at endline to allow errors);
keep if in_endline_new==0|(in_endline_new==1 & selected_child_age>=48 & selected_child_age<=96 & selected_child_age!=.)|
	(in_endline_new==1 & child_gender_mismatch_resolved==0);

qui drop if in_endline==0;

* sample sizes (# of children and project sites);
qui egen site=tag(settlement_code);
table treatment, c(freq rawsum site);
table treatment, c(rawsum in_baseline_mdat rawsum in_endline_mdat rawsum in_endline_old_mdat rawsum in_endline_new_mdat);
	
/*endline outcomes*/
local Y "zfinemotor_endline zlanghear_endline zfinemotor_adj_endline zlanghear_adj_endline 
	langhear_name_endline langhear_sentence_endline langhear_count_pct_endline langhear_colors_pct_endline
	langhear_blocks_pct_endline langhear_books_pct_endline
	finemotor_blocks_pct_endline finemotor_draw_pct_endline finemotor_order_pct_endline finemotor_fold_endline
	mdat_blocks_pct_endline";

* get unadjusted differences (clustering standard errors by settlement), but adjust p-values for regional stratification;
* analyze community-based ECD and ECD Annex experiments separately;
* can verify that output is identical to regression-baed version of gambia_ecd_results9.do; 	

* WITHOUT control for baseline outcomes;
/*unadjusted means by treatment status, with standard errors clustered by settlement*/
/*further adjust differences and p-values for regional stratification*/
/*ECD Annex treated v. ECD Annex control*/
orth_out `Y' if ECDAnnex_control==1|ECDAnnex_treated==1 using `output'\gambia_ecd_edcc_tableA1c.xls, 
	by(treatment) se vce(cluster settlement_code) count colnum 
	title("unadjusted means, without baseline outcome") replace;

/*community-based v. pure control*/
orth_out `Y' if purecontrol==1|communitybased==1 using `output'\gambia_ecd_edcc_tableA1d.xls, 
	by(treatment) se vce(cluster settlement_code) count colnum 
	title("unadjusted means, without baseline outcome") replace;

/*treatment effects, adjusted for regional stratification*/
orth_out `Y' if ECDAnnex_control==1|ECDAnnex_treated==1 using `output'\gambia_ecd_edcc_tableA1c.xls, 
	by(treatment) se vce(cluster settlement_code) covar(region2) compare count colnum stars
	title("treatment effects, without baseline outcome") happend replace;

orth_out `Y' if purecontrol==1|communitybased==1 using `output'\gambia_ecd_edcc_tableA1d.xls, 
	by(treatment) se vce(cluster settlement_code) covar(region2) compare count colnum stars
	title("treatment effects, without baseline outcome") happend replace;


* WITH control for baseline outcomes;
/*unadjusted means by treatment status, with standard errors clustered by settlement*/
/*further adjust differences and p-values for regional stratification*/
/*limit analysis to items with both baseline & endline measures*/
local Y "zfinemotor zlanghear zfinemotor_adj zlanghear_adj 
	langhear_name langhear_sentence langhear_count_pct langhear_colors_pct
	finemotor_blocks_pct finemotor_draw_pct finemotor_order_pct";	
	
/*ECD Annex treated v. ECD Annex control*/
local i=1;
foreach y in `Y' {;
	if (`i'==1) local append=`""'; /*to overwrite previous results*/
	else local append=`"vappend"';
	qui replace `y'_baseline=. if in_baseline==0|in_endline_new==1;
	qui orth_out `y'_endline if ECDAnnex_control==1|ECDAnnex_treated==1 
		using `output'\gambia_ecd_edcc_tableA1e.xls, by(treatment) se count colnum compare stars
		vce(cluster settlement_code) covar(`y'_baseline region2)
		title("treatment effects, with baseline outcome") `append' replace;
	local i=`i'+1;
};

/*community-based v. pure control*/
local i=1;
foreach y in `Y' {;
	if (`i'==1) local append=`""'; /*to overwrite previous results*/
	else local append=`"vappend"';
	qui replace `y'_baseline=. if in_baseline==0|in_endline_new==1;
	qui orth_out `y'_endline if purecontrol==1|communitybased==1 
		using `output'\gambia_ecd_edcc_tableA1f.xls, by(treatment) se count colnum compare stars
		vce(cluster settlement_code) covar(`y'_baseline region2)
		title("treatment effects, with baseline outcome") `append' replace;
	local i=`i'+1;
};

erase `data'\ecdtemp.dta;
local end=`"$S_TIME"'; 
di "`start'";
di "`end'";
log close;
